# -*- coding: utf-8 -*-
import scrapy
import time

class ZaobaoSpider(scrapy.Spider):
    name = "zaobao"
    allowed_domains = ["www.zaobao.com"]
    start_urls = ['http://www.zaobao.com/realtime/']
    url_list = []

    def parse(self, response):
        urls = response.xpath("//ul[@class='bananas']/ul/li/div/a/@href").extract()
        for url in urls:
        	u = response.urljoin(url)
        	self.url_list.append(u)

        print len(self.url_list)
        for info_url in self.url_list:
        	yield scrapy.Request(info_url,callback = self.parse_info)

    def parse_info(self,response):
        item = NewszaobaoItem()
    	item['url_link'] = response.url
    	#print url_link
    	item['titile'] = response.xpath("//div[@class='body-content']/h1/text()").extract()
    	#print titile[0]
    	item['data'] = response.xpath("//div[@class='body-content']/aside/span[1]/text()").extract()
    	#print data[0]
    	item['times'] = response.xpath("//div[@class='body-content']/aside/span[1]/em/text()").extract()
    	#print times[0]
    	item['autho'] = response.xpath("//div[@class='body-content']/aside/span[2]/a/text()").extract()
    	#print autho[0]
    	#print "----------------------------->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
    	#time.sleep(1)
        return item